{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evidently Metric Presets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    import evidently\n",
    "except:\n",
    "    !pip install git+https://github.com/evidentlyai/evidently.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from sklearn import datasets, ensemble, model_selection\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "from evidently.report import Report\n",
    "from evidently.metric_preset import DataDriftPreset\n",
    "from evidently.metric_preset import DataQualityPreset\n",
    "from evidently.metric_preset import RegressionPreset\n",
    "from evidently.metric_preset import ClassificationPreset\n",
    "from evidently.metric_preset import TargetDriftPreset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset for Data Quality and Integrity\n",
    "adult_data = datasets.fetch_openml(name='adult', version=2, as_frame=True)\n",
    "adult = adult_data.frame\n",
    "\n",
    "adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
    "\n",
    "adult_cur.iloc[:2000, 3:5] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset for regression\n",
    "housing_data = datasets.fetch_california_housing(as_frame=True)\n",
    "housing = housing_data.frame\n",
    "\n",
    "housing.rename(columns={'MedHouseVal': 'target'}, inplace=True)\n",
    "housing['prediction'] = housing_data['target'].values + np.random.normal(0, 3, housing.shape[0])\n",
    "\n",
    "housing_ref = housing.sample(n=5000, replace=False)\n",
    "housing_cur = housing.sample(n=5000, replace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset for Binary Probabilistic Classifcation\n",
    "bcancer_data = datasets.load_breast_cancer(as_frame=True)\n",
    "bcancer = bcancer_data.frame\n",
    "\n",
    "bcancer_ref = bcancer.sample(n=300, replace=False)\n",
    "bcancer_cur = bcancer.sample(n=200, replace=False)\n",
    "\n",
    "bcancer_label_ref = bcancer_ref.copy(deep=True)\n",
    "bcancer_label_cur = bcancer_cur.copy(deep=True)\n",
    "\n",
    "model = ensemble.RandomForestClassifier(random_state=1, n_estimators=10)\n",
    "model.fit(bcancer_ref[bcancer_data.feature_names.tolist()], bcancer_ref.target)\n",
    "\n",
    "bcancer_ref['prediction'] = model.predict_proba(bcancer_ref[bcancer_data.feature_names.tolist()])[:, 1]\n",
    "bcancer_cur['prediction'] = model.predict_proba(bcancer_cur[bcancer_data.feature_names.tolist()])[:, 1]\n",
    "\n",
    "bcancer_label_ref['prediction'] = model.predict(bcancer_label_ref[bcancer_data.feature_names.tolist()])\n",
    "bcancer_label_cur['prediction'] = model.predict(bcancer_label_cur[bcancer_data.feature_names.tolist()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset for multiclass classifcation\n",
    "iris_data = datasets.load_iris(as_frame=True)\n",
    "iris = iris_data.frame\n",
    "\n",
    "iris_ref = iris.sample(n=150, replace=False)\n",
    "iris_cur = iris.sample(n=150, replace=False)\n",
    "\n",
    "model = ensemble.RandomForestClassifier(random_state=1, n_estimators=3)\n",
    "model.fit(iris_ref[iris_data.feature_names], iris_ref.target)\n",
    "\n",
    "iris_ref['prediction'] = model.predict(iris_ref[iris_data.feature_names])\n",
    "iris_cur['prediction'] = model.predict(iris_cur[iris_data.feature_names])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How to run Reports (ex. Dashboard)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_drift_report = Report(metrics=[\n",
    "    DataDriftPreset(num_stattest='ks', cat_stattest='psi', num_stattest_threshold=0.2, cat_stattest_threshold=0.2),\n",
    "])\n",
    "\n",
    "data_drift_report.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#data_drift_report.save_html('data_drift_report.html')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How to create a json Report (ex. Profile)?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_drift_report.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#data_drift_report.save_json('data_drift_report.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How to get a python object with Report's main data?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "data_drift_report.as_dict()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## How to generate a Report with raw data visualizations?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_drift_report = Report(metrics=[\n",
    "    DataDriftPreset(num_stattest='ks', \n",
    "                    cat_stattest='psi', \n",
    "                    num_stattest_threshold=0.2, \n",
    "                    cat_stattest_threshold=0.2),\n",
    "],\n",
    "  options={\"render\": {\"raw_data\": True}}\n",
    "                           )\n",
    "\n",
    "data_drift_report.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_drift_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What Reports are available?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_quality_report = Report(metrics=[\n",
    "    DataQualityPreset(),\n",
    "])\n",
    "\n",
    "data_quality_report.run(reference_data=adult_ref, current_data=adult_cur)\n",
    "data_quality_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regression_performance_report = Report(metrics=[\n",
    "    RegressionPreset(),\n",
    "])\n",
    "\n",
    "regression_performance_report.run(reference_data=housing_ref.sort_index(), current_data=housing_cur.sort_index())\n",
    "regression_performance_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classification_performance_report = Report(metrics=[\n",
    "    ClassificationPreset(probas_threshold=0.7),\n",
    "])\n",
    "\n",
    "classification_performance_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)\n",
    "\n",
    "classification_performance_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_target_drift_report = Report(metrics=[\n",
    "    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),\n",
    "])\n",
    "\n",
    "num_target_drift_report.run(reference_data=housing_ref, current_data=housing_cur)\n",
    "num_target_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "multiclass_cat_target_drift_report = Report(metrics=[\n",
    "    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),\n",
    "])\n",
    "\n",
    "multiclass_cat_target_drift_report.run(reference_data=iris_ref, current_data=iris_cur)\n",
    "multiclass_cat_target_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "binary_cat_target_drift_report = Report(metrics=[\n",
    "    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),\n",
    "])\n",
    "\n",
    "binary_cat_target_drift_report.run(reference_data=bcancer_label_ref, current_data=bcancer_label_cur)\n",
    "binary_cat_target_drift_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob_binary_cat_target_drift_report = Report(metrics=[\n",
    "    TargetDriftPreset(num_stattest='ks', cat_stattest='psi'),\n",
    "])\n",
    "\n",
    "prob_binary_cat_target_drift_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)\n",
    "prob_binary_cat_target_drift_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Support Evidently\n",
    "Did you find the example useful? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
